{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from torchtext import data,datasets\n",
    "from torchtext.vocab import GloVe,FastText,CharNGram\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import torch.nn.functional as F\n",
    "from torch.autograd import Variable\n",
    "import torch\n",
    "from imdb import IMDB\n",
    "import sys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "is_cuda = torch.cuda.is_available()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "TEXT = data.Field(lower=True,fix_length=200,batch_first=True)\n",
    "LABEL = data.Field(sequential=False,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train, test = IMDB.splits(TEXT, LABEL)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "TEXT.build_vocab(train, vectors=GloVe(name='6B', dim=300),max_size=10000,min_freq=10)\n",
    "LABEL.build_vocab(train,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_iter, test_iter = data.BucketIterator.splits((train, test), batch_size=32, device=-1)\n",
    "train_iter.repeat = False\n",
    "test_iter.repeat = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "batch = next(iter(train_iter))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "n_vocab = len(TEXT.vocab)\n",
    "hidden_size = 100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "class IMDBCnn(nn.Module):\n",
    "    \n",
    "    def __init__(self,vocab,hidden_size,n_cat,bs=1,kernel_size=3,max_len=200):\n",
    "        super().__init__()\n",
    "        self.hidden_size = hidden_size\n",
    "        self.bs = bs\n",
    "        self.e = nn.Embedding(n_vocab,hidden_size)\n",
    "        self.cnn = nn.Conv1d(max_len,hidden_size,kernel_size)\n",
    "        self.avg = nn.AdaptiveAvgPool1d(10)\n",
    "        self.fc = nn.Linear(1000,n_cat)\n",
    "        self.softmax = nn.LogSoftmax(dim=-1)\n",
    "        \n",
    "    def forward(self,inp):\n",
    "        bs = inp.size()[0]\n",
    "        if bs != self.bs:\n",
    "            self.bs = bs\n",
    "        e_out = self.e(inp)\n",
    "        cnn_o = self.cnn(e_out) \n",
    "        cnn_avg = self.avg(cnn_o)\n",
    "        cnn_avg = cnn_avg.view(self.bs,-1)\n",
    "        fc = F.dropout(self.fc(cnn_avg),p=0.5)\n",
    "        return self.softmax(fc)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model = IMDBCnn(n_vocab,hidden_size,n_cat=3,bs=32,kernel_size=2)\n",
    "model = model.cuda()\n",
    "\n",
    "optimizer = optim.Adam(model.parameters(),lr=1e-3)\n",
    "\n",
    "def fit(epoch,model,data_loader,phase='training',volatile=False):\n",
    "    if phase == 'training':\n",
    "        model.train()\n",
    "    if phase == 'validation':\n",
    "        model.eval()\n",
    "        volatile=True\n",
    "    running_loss = 0.0\n",
    "    running_correct = 0\n",
    "    for batch_idx , batch in enumerate(data_loader):\n",
    "        text , target = batch.text , batch.label\n",
    "        if is_cuda:\n",
    "            text,target = text.cuda(),target.cuda()\n",
    "        \n",
    "        if phase == 'training':\n",
    "            optimizer.zero_grad()\n",
    "        output = model(text)\n",
    "        loss = F.nll_loss(output,target)\n",
    "        \n",
    "        running_loss += F.nll_loss(output,target,size_average=False).data[0]\n",
    "        preds = output.data.max(dim=1,keepdim=True)[1]\n",
    "        running_correct += preds.eq(target.data.view_as(preds)).cpu().sum()\n",
    "        if phase == 'training':\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "    \n",
    "    loss = running_loss/len(data_loader.dataset)\n",
    "    accuracy = 100. * running_correct/len(data_loader.dataset)\n",
    "    \n",
    "    print(f'{phase} loss is {loss:{5}.{2}} and {phase} accuracy is {running_correct}/{len(data_loader.dataset)}{accuracy:{10}.{4}}')\n",
    "    return loss,accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/vishnu/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:14: DeprecationWarning: generator 'Iterator.__iter__' raised StopIteration\n",
      "  \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training loss is  0.61 and training accuracy is 16361/25000     65.44\n",
      "validation loss is  0.48 and validation accuracy is 19111/25000     76.44\n",
      "training loss is  0.39 and training accuracy is 20595/25000     82.38\n",
      "validation loss is  0.53 and validation accuracy is 19199/25000      76.8\n",
      "training loss is  0.29 and training accuracy is 21918/25000     87.67\n",
      "validation loss is  0.45 and validation accuracy is 20203/25000     80.81\n",
      "training loss is  0.23 and training accuracy is 22683/25000     90.73\n",
      "validation loss is  0.45 and validation accuracy is 20537/25000     82.15\n"
     ]
    }
   ],
   "source": [
    "train_losses , train_accuracy = [],[]\n",
    "val_losses , val_accuracy = [],[]\n",
    "\n",
    "for epoch in range(1,5):\n",
    "\n",
    "    epoch_loss, epoch_accuracy = fit(epoch,model,train_iter,phase='training')\n",
    "    val_epoch_loss , val_epoch_accuracy = fit(epoch,model,test_iter,phase='validation')\n",
    "    train_losses.append(epoch_loss)\n",
    "    train_accuracy.append(epoch_accuracy)\n",
    "    val_losses.append(val_epoch_loss)\n",
    "    val_accuracy.append(val_epoch_accuracy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
